Fonctions, variables

# W O R K I N G  D I R E C T O R Y
main.dir = "/shared/projects/chrom_enhancer_bc"
setwd(main.dir)

data.dir = file.path(main.dir,"data/interaction/rdata/")

# I M P O R T
source("./scripts/fun_interactions.r")

load(file.path(data.dir,"/3div_0.05.Rdata"))
load(file.path(data.dir,"/raw_BENGI_interactions_POLR3G.RData"))

data = File_list_filtered0.05
rm(File_list_filtered0.05)
data_05 = lapply(data, hic_format )
data_01 = lapply(data_05, function(x) filter_pvalue(df=x, seuil = 2))


heatmap_range = GRanges(
  seqnames = Rle("chr5"),
  IRanges(89768410-2e+6, 89777404+2e+6)
)

# bins of 8kb
binsize = 8000
# bins of 8kb for heatmap visualisations center around polr3g bait
borne_inf = seq(89768410-2e+6 , 89777404+2e+6, by = binsize)
borne_sup = seq(89768410-2e+6+binsize-1 , 89777404+2e+6+binsize-1, by = binsize)

# Subset to keep chr5 (faster computation)
chr5 = lapply(data_05, function(x) subset_chr(x, "chr5"))

# Creating the list of subsetted dataframe around the interval 
range_4MB = lapply(chr5, function(x) subset_by_interval(x,89768410-2e+6, 89777404+2e+6) )

# add empty interactions (diagonal) for heatmap visualisation
diagonal = data.frame("frag1"=paste0(borne_inf,"-",borne_sup), 
                      "frag2"=paste0(borne_inf,"-",borne_sup))

# Adding "diagonal" values in each dataframe of the list
range_4MB = lapply(range_4MB, function(x) rbind(dplyr::select(x,frag1,frag2), diagonal))


# transform it into a single dataframe
range_4MB = ldply(range_4MB)

# bins into dataframe for easier steps
bornes = data.frame("start"=borne_inf, "end"=borne_sup)

# correspondence between bins and frag1 and 2 chromosomal positions
include_in_bornes = get_position_inside_bins(range_4MB,bornes, 8000, "frag1")
include_in_bornes2 = get_position_inside_bins(range_4MB,bornes, 8000, "frag2")


# Merge all the bins interactions
tmp = merge(dplyr::select(range_4MB,frag1,frag2),include_in_bornes, by.x = "frag1", by.y = ".id") 
interactions = merge(tmp, include_in_bornes2, by.x = "frag2", by.y = ".id")


# Middle position of the bin
interactions = interactions %>% mutate(
  frag1 = (start.x + end.x )/ 2,
  frag2 = (start.y + end.y )/ 2
) %>% dplyr::select(frag1, frag2)


# Symetric matrix :
interactions_reverse = filter(interactions, frag1 != frag2)
interactions_reverse = interactions_reverse %>% mutate(
  frag1_r = frag2,
  frag2_r = frag1
  ) %>% dplyr::select(frag1_r, frag2_r)

colnames(interactions_reverse) = c("frag1","frag2")
interactions = rbind(interactions, interactions_reverse)

# To create square matrix
mat = as.matrix(xtabs(~frag1+frag2,interactions))

Heatmap d’interactions (p < 0.05)

pheatmap(log2(mat+1),
         cluster_cols = FALSE, 
         cluster_rows = FALSE,
         show_rownames = TRUE, 
         show_colnames = TRUE,
         scale = "none",
         #breaks = breaks,
         #inferno(8, direction = -1)
         brewer.pal(8, name = "YlOrRd"),
         fontsize = 12,
         main= "Contact heatmap center on POLR3G bait (87-91 Mbon chr5, p<0.05)")  

polr3g_exact = GRanges(
  seqnames = Rle("chr5"),
  IRanges(89768410, 89777404)
)

interaction_polr3g = 
  lapply(data_05, function(x) subset_by_overlap(ROI = polr3g_exact, df = x))
interaction_polr3g = ldply(interaction_polr3g)

# Color scale 3div
interactions = data.frame(table(interaction_polr3g$interaction_polr3g))
colors = viridis(max(interactions$Freq), 
                 direction = -1)[cut(interactions$Freq, max(interactions$Freq))] 

inf = seq(88000000,91000000,by = (91000000-88000000)/15)
sup = seq(88190000,91000000,by = (91000000-88000000)/15)
bornes = paste0(inf[1:15],"-",sup[1:15])
color_scale = GRanges(bornes, seqnames = Rle("chr5"))


# color scale BENGI
load(file.path(data.dir,"/raw_BENGI_interactions_POLR3G.RData"))
raw_bengi_polr3g_df = ldply(raw_bengi_polr3g)
polr3g_BENGI = data.frame(table(raw_bengi_polr3g_df$polr3g_interaction))

bengi = GRanges(polr3g_BENGI$Var1, seqnames = Rle("chr5"))
range_bengi = GRanges(
  seqnames = Rle("chr"),
  IRanges(polr3g_BENGI$Var1)
)

colors_bengi = viridis(max(polr3g_BENGI$Freq), 
                 direction = -1)[cut(polr3g_BENGI$Freq,max(polr3g_BENGI$Freq))] 


inf_2 = seq(88000000,91000000,by = (91000000-88000000)/max(polr3g_BENGI$Freq))
sup_2 = seq(89450000,91000000,by = (91000000-88000000)/max(polr3g_BENGI$Freq))
bornes_2 = paste0(inf[1:max(polr3g_BENGI$Freq)],"-",sup[1:max(polr3g_BENGI$Freq)])
color_scale_2 = GRanges(bornes_2, seqnames = Rle("chr5"))







ref = GRanges(
  unique(interaction_polr3g$interaction_polr3g),
  seqnames = Rle("chr5"))  

atrack <- AnnotationTrack(ref, name = "3div", fill = colors, col = NULL)
atrack_bengi = AnnotationTrack(bengi, name = "BENGI",col = NULL, fill = colors_bengi)
atrack_scale = AnnotationTrack(color_scale, col = NULL, 
                               fill =viridis(max(interactions$Freq), direction = -1),
                               showFeatureId=TRUE, name = "colors_scale 3_div",
                               id = c(1:max(interactions$Freq))) 

atrack_scale_bengi = AnnotationTrack(color_scale_2, col = NULL, 
                               fill =viridis(max(polr3g_BENGI$Freq), direction = -1),
                               showFeatureId=TRUE, name = "colors_scale bengi",
                               id = c(1:2)) 






gtrack <- GenomeAxisTrack(IRanges(start = 89768410 - 2000000, end = 89777404 + 2000000))
itrack <- IdeogramTrack(genome="hg19", 
                        chromosome="chr5", 
                        from =89768410 - 2000000,
                        to=89777404 + 2000000)
## Warning in eval(expression, envir = callEnv): input string ' ' cannot be
## translated to UTF-8, is it valid in 'ANSI_X3.4-1968'?
## Warning in eval(expression, envir = callEnv): input string ' ' cannot be
## translated to UTF-8, is it valid in 'ANSI_X3.4-1968'?
## Warning in eval(expression, envir = callEnv): input string ' ' cannot be
## translated to UTF-8, is it valid in 'ANSI_X3.4-1968'?
## Warning in eval(expression, envir = callEnv): input string ' ' cannot be
## translated to UTF-8, is it valid in 'ANSI_X3.4-1968'?
## Warning in eval(expression, envir = callEnv): input string ' ' cannot be
## translated to UTF-8, is it valid in 'ANSI_X3.4-1968'?
## Warning in eval(expression, envir = callEnv): input string ' ' cannot be
## translated to UTF-8, is it valid in 'ANSI_X3.4-1968'?
gtrack <- GenomeAxisTrack(IRanges(start = 89768410 - 2000000, end = 89777404 + 2000000))
itrack <- IdeogramTrack(genome="hg19", 
                        chromosome="chr5", 
                        from =89768410 - 2000000,
                        to=89777404 + 2000000)

ht = HighlightTrack(trackList = 
                      list(atrack_scale, itrack, gtrack,atrack, atrack_bengi,atrack_scale_bengi),
                      start = 89768410, end = 89777404, name = "POLR3G_Bait")

plotTracks(list(ht))
## Warning in seq_len(start(y)): first element used of 'length.out' argument
## Warning in start(y):end(y): numerical expression has 2 elements: only the first
## used

## Warning in start(y):end(y): numerical expression has 2 elements: only the first
## used

Avec p<0.01

# Subset to keep chr5 (faster computation)
chr5 = lapply(data_01, function(x) subset_chr(x, "chr5"))

# Creating the list of subsetted dataframe around the interval 
range_4MB = lapply(chr5, function(x) subset_by_interval(x,89768410-2e+6, 89777404+2e+6) )

# add empty interactions (diagonal) for heatmap visualisation
diagonal = data.frame("frag1"=paste0(borne_inf,"-",borne_sup), 
                      "frag2"=paste0(borne_inf,"-",borne_sup))

# Adding "diagonal" values in each dataframe of the list
range_4MB = lapply(range_4MB, function(x) rbind(dplyr::select(x,frag1,frag2), diagonal))


# transform it into a single dataframe
range_4MB = ldply(range_4MB)

# bins into dataframe for easier steps
bornes = data.frame("start"=borne_inf, "end"=borne_sup)

# correspondence between bins and frag1 and 2 chromosomal positions
include_in_bornes = get_position_inside_bins(range_4MB,bornes, 8000, "frag1")
include_in_bornes2 = get_position_inside_bins(range_4MB,bornes, 8000, "frag2")


# Merge all the bins interactions
tmp = merge(dplyr::select(range_4MB,frag1,frag2),include_in_bornes, by.x = "frag1", by.y = ".id") 
interactions = merge(tmp, include_in_bornes2, by.x = "frag2", by.y = ".id")


# Middle position of the bin
interactions = interactions %>% mutate(
  frag1 = (start.x + end.x )/ 2,
  frag2 = (start.y + end.y )/ 2
) %>% dplyr::select(frag1, frag2)


# Symetric matrix :
interactions_reverse = filter(interactions, frag1 != frag2)
interactions_reverse = interactions_reverse %>% mutate(
  frag1_r = frag2,
  frag2_r = frag1
  ) %>% dplyr::select(frag1_r, frag2_r)

colnames(interactions_reverse) = c("frag1","frag2")
interactions = rbind(interactions, interactions_reverse)

# To create square matrix
mat = as.matrix(xtabs(~frag1+frag2,interactions))
pheatmap(log2(mat+1),
         cluster_cols = FALSE, 
         cluster_rows = FALSE,
         show_rownames = TRUE, 
         show_colnames = TRUE,
         scale = "none",
         #breaks = breaks,
         #inferno(8, direction = -1)
         brewer.pal(8, name = "YlOrRd"),
         fontsize = 12,
         main= "Contact heatmap center on POLR3G bait (87-91 Mbon chr5, p<0.01)")  

polr3g_exact = GRanges(
  seqnames = Rle("chr5"),
  IRanges(89768410, 89777404)
)

interaction_polr3g = 
  lapply(data_01, function(x) subset_by_overlap(ROI = polr3g_exact, df = x))

interaction_polr3g = ldply(interaction_polr3g)

# colors scale
interactions = data.frame(table(interaction_polr3g$interaction_polr3g))
colors = viridis(max(interactions$Freq), 
                 direction = -1)[cut(interactions$Freq,max(interactions$Freq))] 

colors_bengi = viridis(max(polr3g_BENGI$Freq), 
                 direction = -1)[cut(polr3g_BENGI$Freq,max(polr3g_BENGI$Freq))] 

inf = seq(88000000,91000000,by = (91000000-88000000)/max(interactions$Freq))
sup = seq(88290000,91000000,by = (91000000-88000000)/max(interactions$Freq))
bornes = paste0(inf[1:max(interactions$Freq)],"-",sup[1:max(interactions$Freq)])
color_scale = GRanges(bornes, seqnames = Rle("chr5"))

inf_2 = seq(88000000,91000000,by = (91000000-88000000)/max(polr3g_BENGI$Freq))
sup_2 = seq(89450000,91000000,by = (91000000-88000000)/max(polr3g_BENGI$Freq))
bornes_2 = paste0(inf[1:max(polr3g_BENGI$Freq)],"-",sup[1:max(polr3g_BENGI$Freq)])
color_scale_2 = GRanges(bornes_2, seqnames = Rle("chr5"))


ref = GRanges(
  unique(interaction_polr3g$interaction_polr3g),
  seqnames = Rle("chr5"))  

atrack <- AnnotationTrack(ref, name = "3div", fill = colors, col = NULL)
atrack_bengi = AnnotationTrack(bengi, name = "BENGI",col = NULL, fill = colors_bengi)
atrack_scale = AnnotationTrack(color_scale, col = NULL, 
                               fill =viridis(max(interactions$Freq), direction = -1),
                               showFeatureId=TRUE, name = "colors_scale 3_div",
                               id = c(1:max(interactions$Freq))) 

atrack_scale_bengi = AnnotationTrack(color_scale_2, col = NULL, 
                               fill =viridis(max(polr3g_BENGI$Freq), direction = -1),
                               showFeatureId=TRUE, name = "colors_scale bengi",
                               id = c(1:2)) 


gtrack <- GenomeAxisTrack(IRanges(start = 89768410 - 2000000, end = 89777404 + 2000000))
itrack <- IdeogramTrack(genome="hg19", 
                        chromosome="chr5", 
                        from =89768410 - 2000000,
                        to=89777404 + 2000000)

ht = HighlightTrack(trackList = 
                      list(atrack_scale, itrack, gtrack,atrack, atrack_bengi,atrack_scale_bengi),
                      start = 89768410, end = 89777404, name = "POLR3G_Bait")

plotTracks(list(ht))
## Warning in seq_len(start(y)): first element used of 'length.out' argument
## Warning in start(y):end(y): numerical expression has 2 elements: only the first
## used

## Warning in start(y):end(y): numerical expression has 2 elements: only the first
## used